*****************************************************************
* 
* Purpose:	
*
*	Construct the file reliability.dta from RCN source data
*
*	"Grantmaking, Grading on a Curve, and the Paradox of Relative Evaluation in Nonmarkets" 
*   Jérôme Adda and Marco Ottaviani
*   
*   
*
* Input:
*
*	RCN data:   reliability.dta	
*
* Output:
*
*       reliability.dta
*
*
* Author:	
*
*       Jerome Adda and Marco Ottaviani
*
* Date created: 
*	20 Aug 2023
*	
********************************************************************* 	

global path "/Users/`c(username)'/Dropbox/ValutazioneBocconi_ERC/ERC/ReplicationQJE"

cd "$path"

******************************************************************************
***** REFEREES ***********
******************************************************************************
clear
import excel "$path/0624_Søknader_2002-2006_til_DESREV-prosjektet.xlsx", sheet("Fagekspert") cellrange(B4:I33426) firstrow
save referees,replace

clear
import excel "$path/0624_Søknader_2007-2009_til_DESREV-prosjektet.xlsx", sheet("Fagekspert") cellrange(B4:I24967) firstrow
append using referees, force
save referees,replace

clear
import excel "$path/0624_Søknader_2010-2012_til_DESREV-prosjektet.xlsx", sheet("Fagekspert") cellrange(B4:I45046) firstrow
append using referees, force
save referees,replace

clear
import excel "$path/0624_Søknader_2013-2014_til_DESREV-prosjektet.xlsx", sheet("Fagekspert") cellrange(B4:I34582) firstrow
append using referees, force
save referees,replace

clear
import excel "$path/0624_Søknader_2015-2016_til_DESREV-prosjektet.xlsx", sheet("Fagekspert") cellrange(B4:I40483) firstrow
append using referees, force
save referees,replace

clear
import excel "$path/0624_Søknader_2017-2018_til_DESREV-prosjektet.xlsx", sheet("Fagekspert") cellrange(B4:I43284) firstrow
append using referees, force
save referees,replace

clear
import excel "$path/0624_Søknader_2019_til_DESREV-prosjektet.xlsx", sheet("Fagekspert") cellrange(B4:I22660) firstrow
append using referees, force

rename Prosjektnummer idproject
rename Kjønn sex_referee
rename PersonId idreferee
rename Fødselsdato dob_referee
rename Rollenavn name_referee
rename Tittel title_referee
rename Akademisk phd_referee
drop Rolle
format dob_referee %td
gen yob_referee=year(dob_referee)
sort idproject
save referees,replace

******************************************************************************
***** APPLICANTS ***********
******************************************************************************
clear
import excel "$path/0624_Søknader_2002-2006_til_DESREV-prosjektet.xlsx", sheet("Søknader") cellrange(B4:S28687)  firstrow
save applicants,replace

clear
import excel "$path/0624_Søknader_2007-2009_til_DESREV-prosjektet.xlsx", sheet("Søknader") cellrange(B4:S15751) firstrow
append using applicants, force
save applicants,replace

clear
import excel "$path/0624_Søknader_2010-2012_til_DESREV-prosjektet.xlsx", sheet("Søknader") cellrange(B4:S16917) firstrow
append using applicants, force
save applicants,replace

clear
import excel "$path/0624_Søknader_2013-2014_til_DESREV-prosjektet.xlsx", sheet("Søknader") cellrange(B4:S11731) firstrow
append using applicants, force
save applicants,replace

clear
import excel "$path/0624_Søknader_2015-2016_til_DESREV-prosjektet.xlsx", sheet("Søknader") cellrange(B4:S12637) firstrow
append using applicants, force
save applicants,replace

clear
import excel "$path/0624_Søknader_2017-2018_til_DESREV-prosjektet.xlsx", sheet("Søknader") cellrange(B4:S12407) firstrow
append using applicants, force
save applicants,replace

clear
import excel "$path/0624_Søknader_2019_til_DESREV-prosjektet.xlsx", sheet("Søknader") cellrange(B4:S6307) firstrow
append using applicants, force
save applicants,replace


rename Prosjektnummer idproject
rename PersonId idperson
rename Søktbeløp amount_claimed
rename Revidertbudsjett amount_won
rename Prosjekttittelprimærspråk title
rename Prosjektsammendrag abstract
rename Prosjektledernavnsiste pi_name
rename Prosjektansvarlignavnsiste pi_institution
rename Søknadsår year
rename Prosjektfase outcome
rename Aktivitetskodeognavn program_name
gen program_code=word(program_name,1)
gen abstract_length=length(abstract)

replace outcome="Considered" if outcome=="Vurderes"
replace outcome="Rejected" if outcome=="Avslag"
replace outcome="Granted" if outcome=="Bevilgning"
replace outcome="Received" if outcome=="Mottatt"
replace outcome="Completed" if outcome=="Avsluttet"
sort year idperson idproject
order idproject idperson year pi_name outcome title abstract amount_claimed amount_won abstract_length
gen FRIPRO=0
global friproname "FRIBIO FRIBIO2 FRIENERGI FRIHUM FRIHUMSAM FRIMED FRIMED2 FRIMEDBIO FRIMUF FRINAT FRINATEK FRIPRO FRISAM FRISAM2 FRITEK"
foreach prgname of global friproname {
replace FRIPRO=1 if program_code=="`prgname'"
}
sort idperson idproject
save applicants, replace

** Sex of applicant
clear
import excel "$path/0810_Søknader_2002-2019_til_DESREV-prosjektet_PL_kjønn.xlsx", sheet("Søknader") cellrange(B4:D98467) firstrow
rename Prosjektnummer idproject
rename PersonId idperson
rename Kjønn sex
sort idperson idproject
merge idperson idproject using applicants
tab _merge
drop if _merge==1
drop _merge
replace sex="KVINNE" if idperson==109381
replace sex="KVINNE" if idperson==120217
replace sex="MANN" if idperson==10657
replace sex="MANN" if idperson==66592
replace sex="MANN" if idperson==94749
replace sex="MANN" if idperson==173704
replace sex="MANN" if idperson==186135
replace sex="MANN" if idperson==186476
replace sex="MALE" if sex=="MANN"
replace sex="FEMALE" if sex=="KVINNE"
replace sex="UNKNOWN" if sex=="UKJENT"
gen firstname=word(pi_name,1)
gen lastname=word(pi_name,-1)
gen first_name_soundex=soundex(firstname) 
gen surname_soundex=soundex(lastname)
gen all_soundex=soundex(pi_name)

gen aux=sex=="FEMALE"
egen mfemale=mean(aux),by(firstname)
drop aux

replace sex="MALE" if mfemale<0.02&sex=="UNKNOWN"
replace sex="FEMALE" if mfemale>0.7&sex=="UNKNOWN"
replace sex="FEMALE" if firstname=="Beth"& sex=="UNKNOWN"
replace sex="FEMALE" if firstname=="Kajsa"& sex=="UNKNOWN"
replace sex="FEMALE" if firstname=="Carine"& sex=="UNKNOWN"
replace sex="FEMALE" if firstname=="Lidia"& sex=="UNKNOWN"
replace sex="FEMALE" if firstname=="Cathrin"& sex=="UNKNOWN"
replace sex="FEMALE" if firstname=="Zora"& sex=="UNKNOWN"
replace sex="FEMALE" if firstname=="Taina"& sex=="UNKNOWN"
replace sex="FEMALE" if firstname=="Eeva"& sex=="UNKNOWN"
replace sex="FEMALE" if firstname=="Ozgu"& sex=="UNKNOWN"
replace sex="MALE" if firstname=="Juan"& sex=="UNKNOWN"
replace sex="MALE" if firstname=="Sten"& sex=="UNKNOWN"
replace sex="MALE" if pi_name=="Nicola Varchetta"
replace sex="MALE" if pi_name=="Inderjit Singh Marjara"
replace sex="MALE" if pi_name=="Dan Roger Sträng"
replace sex="FEMALE" if pi_name=="Vidya Athreya"
replace sex="MALE" if pi_name=="Ken Ove Heiberg"
replace sex="FEMALE" if pi_name=="Andrea Birkeland"
replace sex="FEMALE" if pi_name=="Agne Johannessen"
replace sex="MALE" if pi_name=="Haakon Gunnerud"
replace sex="MALE" if pi_name=="Dan Lysne"
drop mfemale
sort idproject
save applicants, replace

* Saving the names of the pis in the hope that they are applicants too.
* to be used below
u applicants,clear
keep idperson pi_name
rename idperson idapplicant
rename pi_name appli_name
sort idapp
save applicant_names,replace
************************************************************************
*** info on date of birth and gender of applicants (and not of the current PI)

clear
import excel "$path/1209_Søknader_2002-2006_til_DESREV-prosjektet_PL_fødselsdato.xlsx", sheet("Soknader 02-06") cellrange(B4:J27745) firstrow
rename Prosjektnummer idproject
rename PersonId idapplicant
rename Kjønn sexapplicant
rename Fødselsdato dob
rename Fraår fromyear
rename Tilår toyear 
drop Rolle
replace sexapplicant="MALE" if sexapplicant=="MANN"
replace sexapplicant="FEMALE" if sexapplicant=="KVINNE"
replace sexapplicant="UNKNOWN" if sexapplicant=="UKJENT"
sort idproject fromyear toyear idapplicant
* keep the person who applied, not the one handling the grant at the time
count
by idproject:keep if _n==1
count
sort idproject
merge idproject using applicants
keep if _merge==3
drop _merge
save tempapplicants1,replace

clear
import excel "$path/1209_Søknader_2007-2010_til_DESREV-prosjektet_PL_fødselsdato.xlsx", sheet("Søknader 07-10") cellrange(B4:J20633) firstrow
rename Prosjektnummer idproject
rename PersonId idapplicant
rename Kjønn sexapplicant
rename Fødselsdato dob
rename Fraår fromyear
rename Tilår toyear 
drop Rolle
replace sexapplicant="MALE" if sexapplicant=="MANN"
replace sexapplicant="FEMALE" if sexapplicant=="KVINNE"
replace sexapplicant="UNKNOWN" if sexapplicant=="UKJENT"
sort idproject fromyear toyear idapplicant
* keep the person who applied, not the one handling the grant at the time
count
by idproject:keep if _n==1
count
sort idproject
merge idproject using applicants
keep if _merge==3
drop _merge
save tempapplicants2,replace


clear
import excel "$path/1209_Søknader_2011-2014_til_DESREV-prosjektet_PL_fødselsdato.xlsx", sheet("Søknader 11-14") cellrange(B4:J22572) firstrow

rename Prosjektnummer idproject
rename PersonId idapplicant
rename Kjønn sexapplicant
rename Fødselsdato dob
rename Fraår fromyear
rename Tilår toyear 
drop Rolle
replace sexapplicant="MALE" if sexapplicant=="MANN"
replace sexapplicant="FEMALE" if sexapplicant=="KVINNE"
replace sexapplicant="UNKNOWN" if sexapplicant=="UKJENT"
sort idproject fromyear toyear idapplicant
* keep the person who applied, not the one handling the grant at the time
count
by idproject:keep if _n==1
count
sort idproject
merge idproject using applicants
keep if _merge==3
drop _merge
save tempapplicants3,replace


clear
import excel "$path/1209_Søknader_2015-2017_til_DESREV-prosjektet_PL_fødselsdato.xlsx", sheet("Rapport 2") cellrange(B4:J19255) firstrow
rename Prosjektnummer idproject
rename PersonId idapplicant
rename Kjønn sexapplicant
rename Fødselsdato dob
rename Fraår fromyear
rename Tilår toyear 
drop Rolle
replace sexapplicant="MALE" if sexapplicant=="MANN"
replace sexapplicant="FEMALE" if sexapplicant=="KVINNE"
replace sexapplicant="UNKNOWN" if sexapplicant=="UKJENT"
sort idproject fromyear toyear idapplicant
* keep the person who applied, not the one handling the grant at the time
count
by idproject:keep if _n==1
count
sort idproject
merge idproject using applicants
keep if _merge==3
drop _merge
save tempapplicants4,replace


clear
import excel "$path/1209_Søknader_2018-2019_til_DESREV-prosjektet_PL_fødselsdato (1).xlsx", sheet("Søknader 18-19") cellrange(B4:J12432) firstrow
rename Prosjektnummer idproject
rename PersonId idapplicant
rename Kjønn sexapplicant
rename Fødselsdato dob
rename Fraår fromyear
rename Tilår toyear 
drop Rolle
replace sexapplicant="MALE" if sexapplicant=="MANN"
replace sexapplicant="FEMALE" if sexapplicant=="KVINNE"
replace sexapplicant="UNKNOWN" if sexapplicant=="UKJENT"
sort idproject fromyear toyear idapplicant
* keep the person who applied, not the one handling the grant at the time
count
by idproject:keep if _n==1
count
sort idproject
merge idproject using applicants
keep if _merge==3
drop _merge
save tempapplicants5,replace

for num 1/4:append using tempapplicantsX
sort idproject
save tempapplicants,replace
for num 1/5:erase tempapplicantsX.dta

sort idproject
merge idproject using applicants
keep if _merge==3
drop _merge
sort idapplicant
merge idapplicant using applicant_names
drop if _merge==2
drop _merge
sort idproject
save applicants,replace

******************************************************************************
******  Grades ******************
******************************************************************************
****. NOTE NOTE søknadsfristår is wrong. Correct date is the one in applicants file

* Grades in 2013-2019
clear
import excel "$path/Boccini University - Researcher project + Young research talents.xls", sheet("Grid Results") firstrow
rename prosjektnummer idproject
rename Vitenskapeligdristighetogfagl Boldnessandscientificrenewal 
*rename Centredirector 
rename FormidlingogkommunikasjonFP Disseminationandcommunication 
rename ForskningskvalitetFP Excellence 
rename VirkningerogeffekterFP Impact 
rename Gjennomføringsplanogressursbeh Implementationplanandresource 
rename Samletvurderingfrafageksperte Overallassessmentoftherefere 
rename Relevansiforh Relevance
*rename Principalinvestigators 
rename GjennomføringFP  Qualityandefficiencyoftheim 
*rename Research 
rename VitenskapeligkvalitetFP Scientificmerit 
rename ProsjektgruppenUF Theprojectgroup 
rename ProsjektlederUF Theprojectmanager
rename ekspertId idreferee
sort idproject
save grades,replace

* Some Additional grades for 2013-2018
clear
import excel "$path/Boccini University - Other support FRIPRO + SFF.xls", sheet("Grid Results") firstrow
rename prosjektnummer idproject
rename ekspertId idreferee
sort idproject
tostring søknadstypevariant,replace
append using grades
sort idproject
save grades,replace

** Grades in 2020 - 2021
clear
import excel "$path/OUT-5433-v6.xls", sheet("Grid Results") firstrow
rename prosjektnummer idproject
rename ForskningskvalitetFP Excellence 
rename VirkningerogeffekterFP Impact 
rename Samletvurderingfrafageksperte Overallassessmentoftherefere 
rename GjennomføringFP  Qualityandefficiencyoftheim 
rename ekspertId idreferee
drop tema Relevansiforh Vitenskapeligdristighetogfagl FormidlingogkommunikasjonFP Gjennomføringsplanogressursbeh ProsjektgruppenUF  VitenskapeligkvalitetFP ProsjektlederUF ProsjektkvalitetforFoUprosjek  Prosjektlederogprosjektgruppen 

append using grades
sort idproject
save grades,replace


** Grades in 2011-2012
clear
import excel "$path/out-5433-2012-2011.xls", sheet("Grid Results") firstrow
rename prosjektnummer idproject
rename ekspertId idreferee
rename Samletvurderingfrafageksperte Overallassessmentoftherefere 
rename VitenskapeligkvalitetFP Scientificmerit 
rename Gjennomføringsplanogressursbeh Implementationplanandresource 
rename FormidlingogkommunikasjonFP Disseminationandcommunication
rename Relevansiforh Relevance
drop  tema  ProsjektkvalitetforFoUprosjek  Prosjektlederogprosjektgruppen søknadstypevariant AD

append using grades
sort idproject
save grades,replace


* Additional grades for the period 2002-2019
clear
import excel "$path/0824_Søknader_2002-2019_til_DESREV-prosjektet_Hovedkarakter.xlsx", sheet("Søknader") cellrange(B4:E98466) firstrow clear
rename Prosjektnummer idproject
rename Hovedkarakter  overall_grade
drop D E
sort idproject
save maingrade,replace

merge idproject using grades
tab _merge
drop _merge
*rename ekspertId idreferee
destring idreferee,replace  force
sort idproject idreferee
save grades,replace



**** Load data on research field 

clear
u referees
sort idproject idreferee 
merge idproject idreferee using grades
tab _merge
drop _merge
sort idproject
merge idproject using applicants
tab _merge
drop _merge
rename søknadsfristår yearGrade
rename vurderingstype role
rename bevilget amount_obtained
rename søkt amount_asked

** Filling in blank rows
sort idproject overall_grade
by idproject:replace overall_grade= overall_grade[1] if  overall_grade==.
drop if  overall_grade==.

global fillvar "amount_obtained amount_asked year"
foreach var of global fillvar{
sort idproject `var'
by idproject:replace `var'= `var'[1] if  `var'==.
}

sort idproject fagomrade
by idproject:replace fagomrade=fagomrade[_N] if  fagomrade==""



replace role="0 AGREED" if role=="PANEL"
replace role="1 READER" if role=="SAKSORDFORER"
replace role="2 READER" if role=="ANNENSAKSORDFORER"
replace role="3 READER" if role=="PANELDELTAGER"
replace role="4 EXPERT" if role=="EKSPERT"
replace role="5 NON GRADING" if role==""
sort idproj role 
*gen outcome=0
*replace outcome=1 if amount_obtained>0

for var Excellence Scientificmerit Prosjektlede Overallassess Theprojectman Impact Qualityandeff:destring X, replace force
save merged&cleaned_grade,replace



******************************************************************************
****** Classifying RCN Projects into broad ERC panels
******************************************************************************

* Years 2002-2019
use merged&cleaned_grade,clear
drop if role=="0 AGREED"
drop if role=="5 NON GRADING"
keep if  søknadstype=="Forskerprosjekt"|søknadstype=="Unge forskertalenter"

gen program=word(program_name,1)
* Keep the FRIPRO projects
gen flag=0
replace flag=1 if regexm(program,"FRIBIO")|regexm(program,"FRIENERG")|regexm(program,"FRIHUM")|regexm(program,"FRIMED")|regexm(program,"FRIMUF")|regexm(program,"FRINAT")|regexm(program,"FRISAM")|regexm(program,"FRITEK")|regexm(program,"FRIPRO")
* Consider only research projects at individual level
replace flag=0 if Prosjekttype=="Institusjonsstøtte"|Prosjekttype=="Andre"
keep if flag==1

gen panelProg=""
replace panelProg="LS" if program=="FRIBIO"|program=="FRIBIO2"|program=="FRIMEDBIO"|program=="FRIMED"|program=="FRIMED2"
replace panelProg="PE" if program=="FRIENERGI"|program=="FRINAT"|program=="FRINATEK"|program=="FRITEK"
replace panelProg="SH" if program=="FRIHUM"| program=="FRIHUMSAM"| program=="FRISAM"| program=="FRISAM2"

replace panelProg="PE" if aktivite=="FRINATEK"&panelProg==""
replace panelProg="PE" if aktivite=="NANO2021"&panelProg==""
replace panelProg="PE" if aktivite=="PETROMAKS2"&panelProg==""
replace panelProg="LS" if aktivite=="FRIMEDBIO"&panelProg==""
replace panelProg="LS" if aktivite=="BIONÆR"&panelProg==""
replace panelProg="LS" if aktivite=="HAVBRUK"&panelProg==""
replace panelProg="LS" if aktivite=="HAVBRUK2"&panelProg==""
replace panelProg="SH" if aktivite=="FRIHUMSAM"&panelProg==""

replace panelProg="PE" if tema=="23. Climate change research"&panelProg==""
replace panelProg="PE" if tema=="28. Mathematics"&panelProg==""
replace panelProg="PE" if tema=="29. Physics"&panelProg==""
replace panelProg="PE" if tema=="30. Chemistry"&panelProg==""
replace panelProg="PE" if tema=="31. Materials science and nanotechnology"&panelProg==""
replace panelProg="PE" if tema=="32. Technology and engineering"&panelProg==""
replace panelProg="PE" if tema=="33. ICT"&panelProg==""
replace panelProg="PE" if tema=="34. Geosciences"&panelProg==""
replace panelProg="PE" if tema=="35. Medicin and technology"&panelProg==""

replace panelProg="LS" if tema=="1. Health and welfare services research"&panelProg==""
replace panelProg="LS" if tema=="2. Public health"&panelProg==""
replace panelProg="LS" if tema=="3. Human immunology and infectious diseases"&panelProg==""
replace panelProg="LS" if tema=="4. Cancer"&panelProg==""
replace panelProg="LS" if tema=="5. Neuroscience and mental health"&panelProg==""
replace panelProg="LS" if tema=="6. Other medical and health research"&panelProg==""
replace panelProg="LS" if tema=="7. Human molecular life sciences"&panelProg==""
replace panelProg="LS" if tema=="24. Marine ecosystems"&panelProg==""
replace panelProg="LS" if tema=="25. Pollution and other effects on ecosystems"&panelProg==""
replace panelProg="LS" if tema=="26. Ecology and evolutionary biology"&panelProg==""
replace panelProg="LS" if tema=="27. Molecular biology and physiology in animals/plants/microorganisms"&panelProg==""


replace panelProg="SH" if tema=="8. Children. Adolescence. Family. Gender equality."&panelProg==""
replace panelProg="SH" if tema=="9. Teaching and learning"&panelProg==""
replace panelProg="SH" if tema=="10. Education and society"&panelProg==""
replace panelProg="SH" if tema=="11. Welfare and living conditions"&panelProg==""
replace panelProg="SH" if tema=="14. International development. Relations. Conflict and safety."&panelProg==""
replace panelProg="SH" if tema=="17. State. Governance. Democracy."&panelProg==""
replace panelProg="SH" if tema=="18. Culture and the media"&panelProg==""
replace panelProg="SH" if tema=="19. Historical studies and cultural studies"&panelProg==""
replace panelProg="SH" if tema=="21. Language"&panelProg==""
replace panelProg="SH" if tema=="22. Law. Judicialization. Justice. Crime."&panelProg==""
replace panelProg="SH" if tema=="SAMFUNN"&panelProg==""
replace panelProg="SH" if tema=="10. Education and society"&panelProg==""


replace panelProg="SH" if fagomrade=="Humaniora" &panelProg==""
replace panelProg="SH" if fagomrade=="Samfunnsvitenskap"&panelProg==""
replace panelProg="SH" if fagomrade=="Samfunnsvitenskap / Humaniora"&panelProg==""
replace panelProg="PE" if fagomrade=="Teknologi" &panelProg==""
replace panelProg="LS" if fagomrade=="Medisin og helsefag" &panelProg==""

replace panelProg="SH" if regexm(lower(fagomrade),"humaniora") &panelProg==""

replace panelProg="PE" if regexm(lower(fagomrade),"teknologi") &panelProg==""
replace panelProg="LS" if regexm(lower(fagomrade),"medisin") &panelProg==""
keep if panel~=""
sort idproject
by idproject:keep if _n==1
keep title abstract idproject panelProg
save projects_to_classify1,replace
export delimited using "analysisRCN", replace

* Years 2019-2020
clear
import excel "/Users/Adda/Library/CloudStorage/OneDrive-UniversitàCommercialeLuigiBocconi/norwayPrivate/soknader2020fagkoder.xls", sheet("soknad") firstrow
rename Prosjnr idproject
rename Prosjtittel title
keep idproject title Fagkode* Tema*
sort idproject 
save temp,replace

clear
import excel "/Users/Adda/Library/CloudStorage/OneDrive-UniversitàCommercialeLuigiBocconi/norwayPrivate/soknader2021fagkoder.xls", sheet("soknad") firstrow
rename Prosjnr idproject
rename Prosjtittel title
* note Tema4 is empty in 2021
keep idproject title Fagkode* Tema1 Tema2 Tema3 
append using temp
sort idproject 
save temp,replace

clear
import excel "$path/OUT-5433-v6.xls", sheet("Grid Results") firstrow
rename prosjektnummer idproject
rename søknadsfristår year
keep idproject year fagomrade 

sort idproject
merge idproject using temp
tab _merge
* drop projects without grades
drop if _merge==2
drop _merge

gen panelProg=""
gen fagom1=word(fagomrade,1)


gen w=lower(word(Fagkode1,1))
sort w
merge w using classification_broad
rename panel panel1
drop w _merge
gen w=lower(word(Fagkode2,1))
sort w
merge w using classification_broad
rename panel panel2
drop w _merge
gen w=lower(word(Fagkode3,1))
sort w
merge w using classification_broad
rename panel panel3
drop w _merge
gen sh=(panel1=="sh")+(panel2=="sh")+(panel3=="sh")
gen ls=(panel1=="ls")+(panel2=="ls")+(panel3=="ls")
gen pe=(panel1=="pe")+(panel2=="pe")+(panel3=="pe")
gen pe_=pe/(pe+ls+sh)
gen ls_=ls/(pe+ls+sh)
gen sh_=sh/(pe+ls+sh)
replace panelProg="PE" if pe_>0.5&pe_~=.
replace panelProg="SH" if sh_>0.5&sh_~=.
replace panelProg="LS" if ls_>0.5&ls_~=.


* first classify the projects that fall squarely into one of the categories
replace panelProg="SH" if fagomrade=="Humaniora" &panelProg==""
replace panelProg="SH" if fagomrade=="Samfunnsvitenskap" &panelProg==""
replace panelProg="SH" if fagomrade=="Humaniora / Samfunnsvitenskap" &panelProg==""
replace panelProg="SH" if fagomrade=="Samfunnsvitenskap / Humaniora" &panelProg==""
replace panelProg="PE" if fagomrade=="Matematikk og naturvitenskap" &panelProg==""
replace panelProg="PE" if fagomrade=="Teknologi" &panelProg==""
replace panelProg="LS" if fagomrade=="Medisin og helsefag" &panelProg==""
replace panelProg="LS" if fagomrade=="Landbruks- og fiskerifag" &panelProg==""

* classify the rest based on the first label that appears more related to the abstract
replace panelProg="SH" if regexm(lower(fagom1),"humaniora") &panelProg==""
replace panelProg="SH" if regexm(lower(fagom1),"samfunnsvitenskap") &panelProg==""
replace panelProg="PE" if regexm(lower(fagom1),"teknologi") &panelProg==""
replace panelProg="PE" if regexm(lower(fagom1),"matematikk") &panelProg==""
replace panelProg="LS" if regexm(lower(fagom1),"medisin") &panelProg==""
replace panelProg="LS" if regexm(lower(fagom1),"landbruks") &panelProg==""

sort idproject
by idproject:keep if _n==1
keep title idproject panelProg
save projects_to_classify2,replace


* add information on the abstract that is in different files
clear
import excel "$path/0128_Desrev_søknadsfrist_2020_Søkere.xlsx", sheet("Rapport 1") cellrange(B4:L4411) firstrow
rename Prosjektnummer idproject
rename Prosjektsammendrag abstract
rename PersonId idperson
drop if idproject==.
keep idproject abstract 
gen year=2020
save temp,replace

clear
import excel "$path/0128_Desrev_søknadsfrist_2021_Søkere.xlsx", sheet("Rapport 1") cellrange(B4:L4411) firstrow
rename Prosjektnummer idproject
rename Prosjektsammendrag abstract
rename PersonId idperson
drop if idproject==.
keep idproject abstract 

gen year=2021
append using temp
gen l=length(abstract)
drop if l<100

sort idproject
merge idproject using projects_to_classify2
keep if _merge==3
drop _merge l year
sort idproject 
by idproject:keep if _n==1
sort panelProg idproject
order panelProg idproject
save projects_to_classify2,replace
export delimited using "analysisRCN2020_2021New", replace


** Note projects_to_classify1.dta and projects_to_classify2.dta are sent to the Machine Learning module to get
** the classification into ERC categories

*****************************************************************
**  Allocating projects to ERC codes
*****************************************************************


*** The csv files comes from the Machine Learning module that allocates
*** projects to ERC panels based on abstract, title and aggregate field 
import delimited "$path/SH_predictions_ext.csv", varnames(1) clear 
save temp,replace

import delimited "$path/LS_predictions_ext.csv", varnames(1) clear 
append using temp
save temp,replace

import delimited "$path/PE_predictions_ext.csv", varnames(1) clear 
append using temp
save temp,replace

import delimited "$path/SH_predictions.csv", varnames(1) clear 
append using temp
save temp,replace

import delimited "$path/LS_predictions.csv", varnames(1) clear 
append using temp
save temp,replace

import delimited "$path/PE_predictions.csv", varnames(1) clear 
append using temp
destring idproject,replace force
rename predlabels predERCcode
sort idproject
save  AllRCNPredicted,replace
collapse (mean) pe1-pe10 ls1-ls9 sh1-sh6,by(predERC)
save RCN_Predicted,replace

******************************************************************************
******************************************************************************
****** Collecting data on  Grades 
******************************************************************************
****. NOTE  søknadsfristår is wrong. Correct date is the one in applicants file

* Grades in 2013-2019
clear
import excel "$path/Boccini University - Researcher project + Young research talents.xls", sheet("Grid Results") firstrow
rename prosjektnummer idproject
rename Vitenskapeligdristighetogfagl Boldnessandscientificrenewal 
*rename Centredirector 
rename FormidlingogkommunikasjonFP Disseminationandcommunication 
rename ForskningskvalitetFP Excellence 
rename VirkningerogeffekterFP Impact 
rename Gjennomføringsplanogressursbeh Implementationplanandresource 
rename Samletvurderingfrafageksperte Overallassessmentoftherefere 
*rename Principalinvestigators 
rename GjennomføringFP  Qualityandefficiencyoftheim 
*rename Research 
rename VitenskapeligkvalitetFP Scientificmerit 
rename ProsjektgruppenUF Theprojectgroup 
rename ProsjektlederUF Theprojectmanager
rename ekspertId idreferee
rename vurderingstype role
rename søknadsfristår year
sort idproject
save grades,replace

* Some Additional grades for 2013-2018
clear
import excel "$path/Boccini University - Other support FRIPRO + SFF.xls", sheet("Grid Results") firstrow
rename prosjektnummer idproject
rename ekspertId idreferee
rename søknadsfristår year
sort idproject
tostring søknadstypevariant,replace
append using grades
sort idproject
save grades,replace

clear
import excel "$path/OUT-5433-v6.xls", sheet("Grid Results") firstrow
drop Vitenskapeligdristighetogfagl FormidlingogkommunikasjonFP tema Gjennomføringsplanogressursbeh ProsjektgruppenUF  VitenskapeligkvalitetFP ProsjektlederUF ProsjektkvalitetforFoUprosjek  Prosjektlederogprosjektgruppen Relevansiforholdtilutlysning
rename prosjektnummer idproject
rename søknadsfristår year
rename ForskningskvalitetFP Excellence 
rename VirkningerogeffekterFP Impact 
rename Samletvurderingfrafageksperte Overallassessmentoftherefere 
rename GjennomføringFP  Qualityandefficiencyoftheim 
rename ekspertId idreferee
rename vurderingstype role

append using grades
sort idproject
save grades,replace


* Additional grades for the period 2002-2019
clear
import excel "$path/0824_Søknader_2002-2019_til_DESREV-prosjektet_Hovedkarakter.xlsx", sheet("Søknader") cellrange(B4:E98466) firstrow clear
rename Prosjektnummer idproject
rename Hovedkarakter maingrade
drop D E
sort idproject

merge idproject using grades
tab _merge
drop if _merge==1
drop _merge
destring idreferee,replace  force
sort idproject idreferee
save grades,replace

replace role="0 AGREED" if role=="PANEL"
replace role="1 READER" if role=="SAKSORDFORER"
replace role="2 READER" if role=="ANNENSAKSORDFORER"
replace role="3 READER" if role=="PANELDELTAGER"
replace role="4 EXPERT" if role=="EKSPERT"
replace role="5 NON GRADING" if role==""
sort idproj role 



for var Excellence Scientificmerit Prosjektlede Overallassess Theprojectman Impact Qualityandeff:destring X, replace force
order idproject year role idreferee Boldnessandscientificrenewal-Relevansiforholdtilutlysning

drop if role=="0 AGREED"
drop if role=="5 NON GRADING"
keep if  søknadstype=="Forskerprosjekt"|søknadstype=="Unge forskertalenter"


*****************************************************************

sort idproject
merge idproject using AllRCNPredicted
tab _merge
keep if _merge==3
drop _merge


replace predERCcode=substr(predERCcode,1,2)+"0"+substr(predERCcode,3,1) if length(predERCcode)==3
gen panelcodeERC=predERCcode

save temppredERC,replace

*****************************************************************
** Determine  inter-rater reliability
*****************************************************************
mat KAPPA=J(1,13,.)

global ERCPanels "SH01 SH02 SH03 SH04 SH05 SH06 LS01 LS02 LS03 LS04 LS05 LS06 LS07 LS08 LS09 PE01 PE02 PE03 PE04 PE05 PE06 PE07 PE08 PE09 PE10"

foreach panel of global ERCPanels {
disp in red "PANEL:  `panel'" 

mat TABLEIIa_b=J(6,5,.)
mat TABLEIIa_se=J(6,5,.)


u temppredERC,clear
rename Overallassessmentoftherefere grade
keep grade idproject idreferee predERCcode
sort idreferee
by idreferee:gen R=_N
by idreferee:gen r=_n
sort idproj
by idproj:gen I=_N
by idproj:gen i=_n
disp "Number of distinct applications"
count if i==1
mat TABLEIIa_b[1,1]=r(N)
disp "Number of distinct reviewers"
count if r==1

disp "Number of applications per reviewer"
su R if r==1
mat TABLEIIa_b[2,1]=r(N)
mat TABLEIIa_b[2,2]=round(r(mean),0.01)
mat TABLEIIa_b[2,3]=round(r(sd),0.01)
mat TABLEIIa_b[2,4]=round(r(min),1)
mat TABLEIIa_b[2,5]=round(r(max),1)

disp "Number of reviewer per application"
su I if i==1
mat TABLEIIa_b[1,1]=r(N)
mat TABLEIIa_b[1,2]=round(r(mean),0.01)
mat TABLEIIa_b[1,3]=round(r(sd),0.01)
mat TABLEIIa_b[1,4]=round(r(min),1)
mat TABLEIIa_b[1,5]=round(r(max),1)

disp "Average grade"
su grade
mat TABLEIIa_b[3,1]=r(N)
mat TABLEIIa_b[3,2]=round(r(mean),0.01)
mat TABLEIIa_b[3,3]=round(r(sd),0.01)
mat TABLEIIa_b[3,4]=round(r(min),1)
mat TABLEIIa_b[3,5]=round(r(max),1)

disp "Average grade, PE"
su grade if substr(predERCcode,1,2)=="PE"
mat TABLEIIa_b[4,1]=r(N)
mat TABLEIIa_b[4,2]=round(r(mean),0.01)
mat TABLEIIa_b[4,3]=round(r(sd),0.01)
mat TABLEIIa_b[4,4]=round(r(min),1)
mat TABLEIIa_b[4,5]=round(r(max),1)

disp "Average grade, LS"
su grade if substr(predERCcode,1,2)=="LS"
mat TABLEIIa_b[5,1]=r(N)
mat TABLEIIa_b[5,2]=round(r(mean),0.01)
mat TABLEIIa_b[5,3]=round(r(sd),0.01)
mat TABLEIIa_b[5,4]=round(r(min),1)
mat TABLEIIa_b[5,5]=round(r(max),1)

disp "Average grade, SH"
su grade if substr(predERCcode,1,2)=="SH"
mat TABLEIIa_b[6,1]=r(N)
mat TABLEIIa_b[6,2]=round(r(mean),0.01)
mat TABLEIIa_b[6,3]=round(r(sd),0.01)
mat TABLEIIa_b[6,4]=round(r(min),1)
mat TABLEIIa_b[6,5]=round(r(max),1)

** Table II
mat rownames TABLEIIa_b = Number~of~reviewers~per~app Number~of~applications~per~rev Application~grades Application~grades~PE Application~grades~LS Application~grades~SH
mat colnames TABLEIIa_b = c1 c2 c3 c4 c5
est2tex TABLEIIa, digit(3) replace dropall preserve suppress collabels(Obs Mean Std.~Dev. Min Max)

drop r I i
drop if R==1

keep if predERCcode=="`panel'"
sort idproject
by idproject:gen N=_N
by idproject:gen ref=_n
drop if N==1
tab N
drop if N>7

keep grade idproject ref N
reshape wide grade ,i(idproject) j(ref)
capture gen grade5=.
capture gen grade6=.
capture gen grade7=.
gen expand=N*(N-1)/2
expand expand

sort idproject
by  idproject: gen n=_n
gen gradeA=grade1
gen gradeB=grade2
replace gradeB=grade3 if n==2&N==3
replace gradeA=grade2 if n==3&N==3
replace gradeB=grade3 if n==3&N==3

replace gradeB=grade3 if n==2&N==4
replace gradeB=grade4 if n==3&N==4
replace gradeA=grade2 if n==4&N==4
replace gradeB=grade3 if n==4&N==4
replace gradeA=grade2 if n==5&N==4
replace gradeB=grade4 if n==5&N==4
replace gradeA=grade3 if n==6&N==4
replace gradeB=grade4 if n==6&N==4

replace gradeB=grade3 if n==2&N==5
replace gradeB=grade4 if n==3&N==5
replace gradeB=grade5 if n==4&N==5
replace gradeA=grade2 if n>=5&N==5
replace gradeB=grade3 if n==5&N==5
replace gradeB=grade4 if n==6&N==5
replace gradeB=grade5 if n==7&N==5
replace gradeA=grade3 if n>=8&N==5
replace gradeB=grade4 if n==8&N==5
replace gradeB=grade5 if n==9&N==5
replace gradeA=grade4 if n==10&N==5
replace gradeB=grade5 if n==10&N==5

replace gradeB=grade3 if n==2&N==6
replace gradeB=grade4 if n==3&N==6
replace gradeB=grade5 if n==4&N==6
replace gradeB=grade6 if n==5&N==6
replace gradeA=grade2 if n>=6&N==6
replace gradeB=grade3 if n==6&N==6
replace gradeB=grade4 if n==7&N==6
replace gradeB=grade5 if n==8&N==6
replace gradeB=grade6 if n==9&N==6
replace gradeA=grade3 if n>=10&N==6
replace gradeB=grade4 if n==10&N==6
replace gradeB=grade5 if n==11&N==6
replace gradeB=grade6 if n==12&N==6
replace gradeA=grade4 if n>=13&N==6
replace gradeB=grade5 if n==13&N==6
replace gradeB=grade6 if n==14&N==6
replace gradeA=grade5 if n==15&N==6
replace gradeB=grade6 if n==15&N==6

replace gradeB=grade3 if n==2&N==7
replace gradeB=grade4 if n==3&N==7
replace gradeB=grade5 if n==4&N==7
replace gradeB=grade6 if n==5&N==7
replace gradeB=grade7 if n==6&N==7
replace gradeA=grade2 if n>=7&N==7
replace gradeB=grade3 if n==7&N==7
replace gradeB=grade4 if n==8&N==7
replace gradeB=grade5 if n==9&N==7
replace gradeB=grade6 if n==10&N==7
replace gradeB=grade7 if n==11&N==7
replace gradeA=grade3 if n>=12&N==7
replace gradeB=grade4 if n==12&N==7
replace gradeB=grade5 if n==13&N==7
replace gradeB=grade6 if n==14&N==7
replace gradeB=grade7 if n==15&N==7
replace gradeA=grade4 if n>=16&N==7
replace gradeB=grade5 if n==16&N==7
replace gradeB=grade6 if n==17&N==7
replace gradeB=grade7 if n==18&N==7
replace gradeA=grade5 if n==19&N==7
replace gradeB=grade6 if n==19&N==7
replace gradeB=grade7 if n==20&N==7
replace gradeA=grade6 if n==21&N==7
replace gradeB=grade7 if n==22&N==7

drop if gradeB==.|gradeA==.

kappaetc gradeA gradeB,categories(1 2 3 4 5 6 7) wgt(power 0.25)
mat kappa=r(b),r(se),r(N)

mat KAPPA= KAPPA \ kappa

}
mat KAPPA=KAPPA[2..26,1..13]

drop _all
svmat KAPPA

gen panelcodeERC=""
local i=1
foreach panel of global ERCPanels {
replace panelcodeERC="`panel'" in `i'
local i=`i'+1
}

rename KAPPA1 PctAgree
rename KAPPA2 Brennan
rename KAPPA3 Cohen
rename KAPPA4 Fleiss
rename KAPPA5 Gwet
rename KAPPA6 Krippendorff
rename KAPPA7 PctAgree_se
rename KAPPA8 Brennan_se
rename KAPPA9 Cohen_se
rename KAPPA10 Fleiss_se
rename KAPPA11 Gwet_se
rename KAPPA12 Krippendorff_se
rename KAPPA13 SampSiz
gen broadpanel=substr(panelcodeERC,1,2)
foreach var of varlist PctAgree Brennan Cohen Fleiss Gwet Krippendorff {
egen `var'Pre=mean(`var'),by(broadpanel)	
egen `var'Post=mean(`var')
ge devPre`var'=`var'-`var'Pre
ge devPost`var'=`var'-`var'Post
}
sort panelcodeERC
corr PctAgree Cohen Fleiss Gwet Krippendorff
save reliability,replace


mat TABLE_b=J(15,500,0)


clear
import excel "$path/erc2020_complete.xlsx", sheet("Raw data") firstrow
rename *,lower
gen grant=substr(granttype,1,1)
destring grant,replace
* construct the budget by year, grant type and panel
collapse (sum) budget (first) paneldescription,by(year grant panel)
rename panel panelcodeERC
sort panelcodeERC
save dataERC,replace



forvalues iter=1(1)500 {
disp in red "Iteration: " "`iter'"
use temppredERC,clear
* Allocate applications based on the predicted probabilities
qui gen u=uniform()
* SH fields
forvalues i=2(1)6 {
local j=`i'-1
qui replace sh`i'=sh`i'+sh`j'	
}
* LS fields
forvalues i=2(1)9 {
local j=`i'-1
qui replace ls`i'=ls`i'+ls`j'	
}
* PE fields
forvalues i=2(1)10 {
local j=`i'-1
qui replace pe`i'=pe`i'+pe`j'	
}
gen P=.
qui replace P=(u>sh1)+(u>sh2)+(u>sh3)+(u>sh4)+(u>sh5)+(u>sh6) if sh1~=.
qui replace P=(u>ls1)+(u>ls2)+(u>ls3)+(u>ls4)+(u>ls5)+(u>ls6)+(u>ls7)+(u>ls8)+(u>ls9) if ls1~=.
qui replace P=(u>pe1)+(u>pe2)+(u>pe3)+(u>pe4)+(u>pe5)+(u>pe6)+(u>pe7)+(u>pe8)+(u>pe9)+(u>pe10) if pe1~=.
qui replace P=P+1 
qui tostring P,replace
qui replace P="0"+P if length(P)==1
qui gen newpredERC=substr(predERCcode,1,2)+P
qui gen correct=newpredERC==predERCcode
su correct 
qui replace predERCcode=newpredERC
qui save temp,replace

mat KAPPA=J(1,13,.)

global ERCPanels "SH01 SH02 SH03 SH04 SH05 SH06 LS01 LS02 LS03 LS04 LS05 LS06 LS07 LS08 LS09 PE01 PE02 PE03 PE04 PE05 PE06 PE07 PE08 PE09 PE10"

foreach panel of global ERCPanels {
*disp in red "PANEL:  `panel'" 

use temp,clear
rename Overallassessmentoftherefere grade
qui keep grade idproject idreferee predERCcode
sort idreferee
qui by idreferee:gen R=_N
qui drop if R==1

qui keep if predERCcode=="`panel'"
sort idproject
qui by idproject:gen N=_N
qui by idproject:gen ref=_n
qui drop if N==1


qui drop if N>7

keep grade idproject ref N
qui reshape wide grade ,i(idproject) j(ref)
capture gen grade5=.
capture gen grade6=.
capture gen grade7=.
gen expand=N*(N-1)/2
qui expand expand

sort idproject
qui by  idproject: gen n=_n
qui gen gradeA=grade1
qui gen gradeB=grade2
qui replace gradeB=grade3 if n==2&N==3
qui replace gradeA=grade2 if n==3&N==3
qui replace gradeB=grade3 if n==3&N==3

qui replace gradeB=grade3 if n==2&N==4
qui replace gradeB=grade4 if n==3&N==4
qui replace gradeA=grade2 if n==4&N==4
qui replace gradeB=grade3 if n==4&N==4
qui replace gradeA=grade2 if n==5&N==4
qui replace gradeB=grade4 if n==5&N==4
qui replace gradeA=grade3 if n==6&N==4
qui replace gradeB=grade4 if n==6&N==4

qui replace gradeB=grade3 if n==2&N==5
qui replace gradeB=grade4 if n==3&N==5
qui replace gradeB=grade5 if n==4&N==5
qui replace gradeA=grade2 if n>=5&N==5
qui replace gradeB=grade3 if n==5&N==5
qui replace gradeB=grade4 if n==6&N==5
qui replace gradeB=grade5 if n==7&N==5
qui replace gradeA=grade3 if n>=8&N==5
qui replace gradeB=grade4 if n==8&N==5
qui replace gradeB=grade5 if n==9&N==5
qui replace gradeA=grade4 if n==10&N==5
qui replace gradeB=grade5 if n==10&N==5

qui replace gradeB=grade3 if n==2&N==6
qui replace gradeB=grade4 if n==3&N==6
qui replace gradeB=grade5 if n==4&N==6
qui replace gradeB=grade6 if n==5&N==6
qui replace gradeA=grade2 if n>=6&N==6
qui replace gradeB=grade3 if n==6&N==6
qui replace gradeB=grade4 if n==7&N==6
qui replace gradeB=grade5 if n==8&N==6
qui replace gradeB=grade6 if n==9&N==6
qui replace gradeA=grade3 if n>=10&N==6
qui replace gradeB=grade4 if n==10&N==6
qui replace gradeB=grade5 if n==11&N==6
qui replace gradeB=grade6 if n==12&N==6
qui replace gradeA=grade4 if n>=13&N==6
qui replace gradeB=grade5 if n==13&N==6
qui replace gradeB=grade6 if n==14&N==6
qui replace gradeA=grade5 if n==15&N==6
qui replace gradeB=grade6 if n==15&N==6

qui replace gradeB=grade3 if n==2&N==7
qui replace gradeB=grade4 if n==3&N==7
qui replace gradeB=grade5 if n==4&N==7
qui replace gradeB=grade6 if n==5&N==7
qui replace gradeB=grade7 if n==6&N==7
qui replace gradeA=grade2 if n>=7&N==7
qui replace gradeB=grade3 if n==7&N==7
qui replace gradeB=grade4 if n==8&N==7
qui replace gradeB=grade5 if n==9&N==7
qui replace gradeB=grade6 if n==10&N==7
qui replace gradeB=grade7 if n==11&N==7
qui replace gradeA=grade3 if n>=12&N==7
qui replace gradeB=grade4 if n==12&N==7
qui replace gradeB=grade5 if n==13&N==7
qui replace gradeB=grade6 if n==14&N==7
qui replace gradeB=grade7 if n==15&N==7
qui replace gradeA=grade4 if n>=16&N==7
qui replace gradeB=grade5 if n==16&N==7
qui replace gradeB=grade6 if n==17&N==7
qui replace gradeB=grade7 if n==18&N==7
qui replace gradeA=grade5 if n==19&N==7
qui replace gradeB=grade6 if n==19&N==7
qui replace gradeB=grade7 if n==20&N==7
qui replace gradeA=grade6 if n==21&N==7
qui replace gradeB=grade7 if n==22&N==7

qui drop if gradeB==.|gradeA==.

qui kappaetc gradeA gradeB,categories(1 2 3 4 5 6 7) wgt(power 0.25)
mat kappa=r(b),r(se),r(N)

mat KAPPA= KAPPA \ kappa

}
mat KAPPA=KAPPA[2..26,1..13]

qui drop _all
qui svmat KAPPA

gen panelcodeERC=""
local i=1
foreach panel of global ERCPanels {
qui replace panelcodeERC="`panel'" in `i'
local i=`i'+1
}

rename KAPPA1 PctAgree
rename KAPPA2 Brennan
rename KAPPA3 Cohen
rename KAPPA4 Fleiss
rename KAPPA5 Gwet
rename KAPPA6 Krippendorff
rename KAPPA7 PctAgree_se
rename KAPPA8 Brennan_se
rename KAPPA9 Cohen_se
rename KAPPA10 Fleiss_se
rename KAPPA11 Gwet_se
rename KAPPA12 Krippendorff_se
rename KAPPA13 SampSiz

gen broadpanel=substr(panelcodeERC,1,2)
foreach var of varlist PctAgree Brennan Cohen Fleiss Gwet Krippendorff {
qui egen `var'Pre=mean(`var'),by(broadpanel)	
qui egen `var'Post=mean(`var')
qui ge devPre`var'=`var'-`var'Pre
qui ge devPost`var'=`var'-`var'Post
}
sort panelcodeERC
save reliabilityBoot,replace

clear
import excel "$path/erc2020_complete.xlsx", sheet("Raw data") firstrow
rename *,lower
gen grant=substr(granttype,1,1)
destring grant,replace
* construct the budget by year, grant type and panel
collapse (sum) budget (first) paneldescription,by(year grant panel)
rename panel panelcodeERC

sort year panelcodeERC grant 
merge year panelcodeERC grant using ERCSuccess
drop _merge

sort panelcodeERC
save ERCcompleteBoot,replace

*use ERCcompleteBoot,clear
qui merge panelcodeERC using reliabilityBoot
drop _merge
drop if year==2007

* construct the term sigma_i-\bar{sigma}_it
foreach var of varlist PctAgree Brennan Cohen Fleiss Gwet Krippendorff {
gen dev`var'=-devPre`var'
replace dev`var'=-devPost`var' if year>2015
}



egen total=sum(budget),by(year grant)
gen ratio=budget/total*100
sort grant panelcodeERC year
by grant panelcodeERC:gen init=ratio[1]
gen ratio2=ratio/init
tab grant,gen(Grant)
gen panel2=substr(panelcodeERC,1,2)
egen tot_eval_year_grant=sum(evaluated),by(year grant)
gen applic=evaluated/tot_eval_year_grant

egen grp=group(panelcodeERC grant)
qui tab grp,gen(Group)
xtset grp year, yearly
qui tab year,gen(Year)
foreach var of varlist ratio applic success devPctAgree devBrennan devCohen devFleiss devGwet devKrippendorff {
qui su `var'
qui replace `var'=`var'/r(sd)	
}


**************************************************************************************
**   Regression Results
**************************************************************************************



** POOLED OVER ALL DISCIPLINES
* Percent Agreement
*areg ratio PostPctAgree Post,a(grp)
areg ratio devPctAgree,a(grp)
mat b=get(_b)
mat TABLE_b[1,`iter']=b[1,1]
* Cohen Kappa
areg ratio devCohen ,a(grp)
mat b=get(_b)
mat TABLE_b[2,`iter']=b[1,1]
* Fleiss Kappa
areg ratio devFleiss,a(grp)
mat b=get(_b)
mat TABLE_b[3,`iter']=b[1,1]
* Gwet AC
areg ratio devGwet ,a(grp)
mat b=get(_b)
mat TABLE_b[4,`iter']=b[1,1]
* Brennan AC
areg ratio devBrennan ,a(grp)
mat b=get(_b)
mat TABLE_b[5,`iter']=b[1,1]

areg applic devPctAgree ,a(grp)
mat b=get(_b)
mat TABLE_b[6,`iter']=b[1,1]

areg applic devCohen ,a(grp)
mat b=get(_b)
mat TABLE_b[7,`iter']=b[1,1]

areg applic devFleiss ,a(grp)
mat b=get(_b)
mat TABLE_b[8,`iter']=b[1,1]

areg applic devGwet ,a(grp)
mat b=get(_b)
mat TABLE_b[9,`iter']=b[1,1]

areg applic devBrennan ,a(grp)
mat b=get(_b)
mat TABLE_b[10,`iter']=b[1,1]


}
mat TABLE_b=TABLE_b'
drop _all
svmat TABLE_b

save bootstrapERC2,replace

collapse (mean) m1=TABLE_b1 m2=TABLE_b2 m3=TABLE_b3 m4=TABLE_b4 m5=TABLE_b5 m6=TABLE_b6 m7=TABLE_b7 m8=TABLE_b8 m9=TABLE_b9 m10=TABLE_b10 (sd) s1=TABLE_b1 s2=TABLE_b2 s3=TABLE_b3 s4=TABLE_b4 s5=TABLE_b5 s6=TABLE_b6 s7=TABLE_b7 s8=TABLE_b8 s9=TABLE_b9 s10=TABLE_b10 (p2) l1=TABLE_b1 l2=TABLE_b2 l3=TABLE_b3 l4=TABLE_b4 l5=TABLE_b5 l6=TABLE_b6 l7=TABLE_b7 l8=TABLE_b8 l9=TABLE_b9 l10=TABLE_b10 (p98) h1=TABLE_b1 h2=TABLE_b2 h3=TABLE_b3 h4=TABLE_b4 h5=TABLE_b5 h6=TABLE_b6 h7=TABLE_b7 h8=TABLE_b8 h9=TABLE_b9 h10=TABLE_b10 

di "Budget shares"
di "Percent Agreement &" round(m1,0.001) " &(" round(s1,0.001) ")& [" round(l1,0.001) ","round(h1,0.001) "]\\"
di "Cohen' kappa &" round(m2,0.001) " &(" round(s2,0.001) ")& [" round(l2,0.001) ","round(h2,0.001) "]\\"
di "Fleiss' AC &" round(m3,0.001) " &(" round(s3,0.001) ")& [" round(l3,0.001) ","round(h3,0.001) "]\\"
di "Gwet' AC &" round(m4,0.001) " &(" round(s4,0.001) ")& [" round(l4,0.001) ","round(h4,0.001) "]\\"

di "Applications"
di "Percent Agreement &" round(m6,0.001) " &(" round(s6,0.001) ")& [" round(l6,0.001) ","round(h6,0.001) "]\\"
di "Cohen' kappa &" round(m7,0.001) " &(" round(s7,0.001) ")& [" round(l7,0.001) ","round(h7,0.001) "]\\"
di "Fleiss' AC &" round(m8,0.001) " &(" round(s8,0.001) ")& [" round(l8,0.001) ","round(h8,0.001) "]\\"
di "Gwet' AC &" round(m9,0.001) " &(" round(s9,0.001) ")& [" round(l9,0.001) ","round(h9,0.001) "]\\"
di "Brennan &" round(m10,0.001) " &(" round(s10,0.001) ")& [" round(l10,0.001) ","round(h10,0.001) "]\\"


